<?php
/**
 * QueryList
 * @link            https://github.com/jae-jae/QueryList
 * https://github.com/jae-jae/QueryList/wiki/API-Elements#texts
 * @version         4.0.0
 *
 */

namespace App\Services\Spider;
use App\Console\Commands\XgSpiderComm;
use EasyWeChat\Kernel\Support\File;
use QL\QueryList;
use GuzzleHttp\Client;

const ERROR = [
    '10001'=>'请求百度地址打开空,失败',
    '10002'=>'请求百度地址有内容,无匹配,失败',
    '10003'=>'其它',
];
/*
使用这个地址，不限cookies
https://m.baidu.com/sf/vsearch?pd=wenda_tab&word=%E7%99%BD%E4%BA%BA%E9%A5%AD%E6%98%AF%E4%BB%80%E4%B9%88%E6%A2%97&tn=vsearch&sa=vs_tab_d&lid=10730041240042650848&ms=1&rqid=10730041240042650848&rfrom=1023384d&rchannel=1024094n
*/

class BaiduDrive extends God
{


    private $ipArray=[];
    protected $keyword = '';

    function claws(string $keyword)
    {
//        $keyword = '找工人';
        $this->keyword = $keyword;

        for ($pn=1;$pn<=5;$pn++) {
            $api = "https://m.baidu.com/sf/vsearch?pd=wenda_tab&word={$this->keyword}&tn=vsearch&sa=vs_tab&lid=9327839806699296556&ms=1&pn={$pn}" ;
            $this->get_aysncpage($api,$pn);  //异步
        }
    }


    private function get_ipjson_array() {
        $url = 'http://gev.qydailiip.com/api/?apikey=977e3d0aa43582bc0ecabccecdd1c26d0b34fe3d&num=60&type=json&line=win&proxy_type=secret&end_time=0';
        $curl = curl_init();
        curl_setopt_array($curl, array(
            CURLOPT_URL => $url,
            CURLOPT_RETURNTRANSFER => true,
            CURLOPT_ENCODING => '',
            CURLOPT_MAXREDIRS => 10,
            CURLOPT_TIMEOUT => 1000,
            CURLOPT_FOLLOWLOCATION => true,
            CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
            CURLOPT_CUSTOMREQUEST => 'GET',
        ));
        $response = curl_exec($curl);
        curl_close($curl);
        $pattern = '/ERROR/';
        if (preg_match($pattern, $response)) {
            return false;
        } else {
            $arr = json_decode($response, true);
            foreach ($arr as $item) {
                $this->ipArray[] = $item;
            }
        }

    }

    function get_ipaddress()
    {
        $i = 0;
        while ($i < 10) {
            $i++;
            if (count($this->ipArray) < 10) {
                sleep(5);
                $this->get_ipjson_array();
            }
            if (count($this->ipArray) > 1) {
                return array_pop($this->ipArray);
            }
        }
    }

    protected function get_aysncpage($api,$pn)
    {
        try {
            $ip = $this->get_ipaddress();
            $client = new \GuzzleHttp\Client(['verify' => false,'timeout'=>1000,'proxy' => 'http://'.$ip]);
            $response = new \GuzzleHttp\Psr7\Request('get',$api);
            $promise = $client->sendAsync($response)->then(function ($resp) use($pn) {
                $status = $resp->getStatusCode();
                if ($status == 200 && $resp->getBody()) {
                    self::parase_content($resp->getBody(),$this->keyword,$pn);
                }
            });
            $promise->wait();
        }catch (\Exception $e) {
            return Null;
        }

    }

    protected static function parase_content($body,$keyword,$pn)
    {
        $ql = QueryList::html($body)->removeHead();

        $title =  $ql->find('.c-title-text')->texts()->all();

        $res['title'] = $title;
        $res['desc'] = $ql->find('.c-text-line-text')->texts()->all();
        if (!$res['title'] || !$res['desc']) {
            return null;
        }

        echo PHP_EOL."找到内容页面{$pn}   ";
        foreach($res['title'] as $k=>$v) {
            $newSpirder[] = [
                'title' => $res['title'][$k],
                'short_article' => $res['desc'][$k]
            ];
        }
        shuffle($newSpirder);
        XgSpiderComm::CallSpiderResult($newSpirder,$keyword,$pn);

    }



}
